{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 统计总分大于班级平均分的学生"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>clazz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>施笑槐</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100002</td>\n",
       "      <td>吕金鹏</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id name  age sex clazz\n",
       "0  1500100001  施笑槐   22   女  文科六班\n",
       "1  1500100002  吕金鹏   24   男  文科六班"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取学生表\n",
    "import pandas as pd\n",
    "students = pd.read_csv(\n",
    "    \"../data/students.txt\",\n",
    "    sep=\",\",\n",
    "    header=None,\n",
    "    names=[\"id\", \"name\", \"age\", \"sex\", \"clazz\"], encoding=\"UTF-8\"\n",
    ")\n",
    "students.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sid</th>\n",
       "      <th>cid</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000001</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000002</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          sid      cid  score\n",
       "0  1500100001  1000001     98\n",
       "1  1500100001  1000002      5"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取分数表\n",
    "scores = pd.read_csv(\n",
    "    \"../data/score.txt\",\n",
    "    sep=\",\",\n",
    "    header=None,\n",
    "    names=[\"sid\", \"cid\", \"score\"], encoding=\"UTF-8\"\n",
    ")\n",
    "scores.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>clazz</th>\n",
       "      <th>sid</th>\n",
       "      <th>cid</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>施笑槐</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000001</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>施笑槐</td>\n",
       "      <td>22</td>\n",
       "      <td>女</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>1500100001</td>\n",
       "      <td>1000002</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id name  age sex clazz         sid      cid  score\n",
       "0  1500100001  施笑槐   22   女  文科六班  1500100001  1000001     98\n",
       "1  1500100001  施笑槐   22   女  文科六班  1500100001  1000002      5"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 1、关联学生表和分数表\n",
    "# merge: 当字段名不一致时，通过指点左右两边的字段进行关联\n",
    "join_df = pd.merge(students, scores, left_on=\"id\", right_on=\"sid\")\n",
    "join_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>clazz</th>\n",
       "      <th>sum_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>406</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100002</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id clazz  sum_score\n",
       "0  1500100001  文科六班        406\n",
       "1  1500100002  文科六班        440"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算学生的总分\n",
    "# reset_index: 重置索引\n",
    "# rename: 修改列表\n",
    "stu_sum_score = join_df\\\n",
    "    .groupby([\"id\", \"clazz\"])[\"score\"]\\\n",
    "    .sum()\\\n",
    "    .reset_index()\\\n",
    "    .rename(columns={\"score\": \"sum_score\"})\n",
    "\n",
    "stu_sum_score.head(2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>clazz</th>\n",
       "      <th>mean_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>文科一班</td>\n",
       "      <td>377.944444</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>文科三班</td>\n",
       "      <td>383.670213</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  clazz  mean_score\n",
       "0  文科一班  377.944444\n",
       "1  文科三班  383.670213"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算班级总分的平均分\n",
    "clazz_mean_score = stu_sum_score\\\n",
    "    .groupby(\"clazz\")[\"sum_score\"].mean()\\\n",
    "    .reset_index()\\\n",
    "    .rename(columns={\"sum_score\":\"mean_score\"})\n",
    "clazz_mean_score.head(2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>clazz</th>\n",
       "      <th>sum_score</th>\n",
       "      <th>mean_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>406</td>\n",
       "      <td>370.240385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100002</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>440</td>\n",
       "      <td>370.240385</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id clazz  sum_score  mean_score\n",
       "0  1500100001  文科六班        406  370.240385\n",
       "1  1500100002  文科六班        440  370.240385"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 关联获取班级平均分\n",
    "stu_clazz_score = pd.merge(stu_sum_score, clazz_mean_score, on=\"clazz\")\n",
    "stu_clazz_score.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>clazz</th>\n",
       "      <th>sum_score</th>\n",
       "      <th>mean_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1500100001</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>406</td>\n",
       "      <td>370.240385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1500100002</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>440</td>\n",
       "      <td>370.240385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1500100007</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>418</td>\n",
       "      <td>370.240385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1500100018</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>425</td>\n",
       "      <td>370.240385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1500100039</td>\n",
       "      <td>文科六班</td>\n",
       "      <td>503</td>\n",
       "      <td>370.240385</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id clazz  sum_score  mean_score\n",
       "0  1500100001  文科六班        406  370.240385\n",
       "1  1500100002  文科六班        440  370.240385\n",
       "2  1500100007  文科六班        418  370.240385\n",
       "3  1500100018  文科六班        425  370.240385\n",
       "5  1500100039  文科六班        503  370.240385"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 取出总分大于平均分的学生\n",
    "sum_mean_score = stu_clazz_score[stu_clazz_score[\"sum_score\"] > stu_clazz_score[\"mean_score\"]]\n",
    "sum_mean_score.head()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 统计总分前十的学生"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sid\n",
       "1500100001    406\n",
       "1500100002    440\n",
       "Name: score, dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算学生的总分\n",
    "sum_score = scores.groupby(\"sid\")[\"score\"].sum()\n",
    "sum_score.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sid\n",
       "1500100929    630\n",
       "1500100080    628\n",
       "1500100308    628\n",
       "1500100873    612\n",
       "1500100418    611\n",
       "1500100258    604\n",
       "1500100875    595\n",
       "1500100930    589\n",
       "1500100596    587\n",
       "1500100834    586\n",
       "Name: score, dtype: int64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 按照总分降序排序,取前十\n",
    "# sort_values:对value排序\n",
    "top10_sco = sum_score.sort_values(ascending=False).head(10)\n",
    "top10_sco"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>clazz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>1500100298</td>\n",
       "      <td>班冬萱</td>\n",
       "      <td>21</td>\n",
       "      <td>女</td>\n",
       "      <td>文科三班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>299</th>\n",
       "      <td>1500100300</td>\n",
       "      <td>吴采波</td>\n",
       "      <td>21</td>\n",
       "      <td>女</td>\n",
       "      <td>理科二班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>298</th>\n",
       "      <td>1500100299</td>\n",
       "      <td>祝涵阳</td>\n",
       "      <td>21</td>\n",
       "      <td>女</td>\n",
       "      <td>文科四班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>799</th>\n",
       "      <td>1500100800</td>\n",
       "      <td>终飞昂</td>\n",
       "      <td>21</td>\n",
       "      <td>男</td>\n",
       "      <td>文科三班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>804</th>\n",
       "      <td>1500100805</td>\n",
       "      <td>戚鸿志</td>\n",
       "      <td>21</td>\n",
       "      <td>男</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>759</th>\n",
       "      <td>1500100760</td>\n",
       "      <td>翁绮彤</td>\n",
       "      <td>24</td>\n",
       "      <td>女</td>\n",
       "      <td>文科五班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>758</th>\n",
       "      <td>1500100759</td>\n",
       "      <td>洪昂然</td>\n",
       "      <td>24</td>\n",
       "      <td>男</td>\n",
       "      <td>理科一班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>353</th>\n",
       "      <td>1500100354</td>\n",
       "      <td>洪尔容</td>\n",
       "      <td>24</td>\n",
       "      <td>女</td>\n",
       "      <td>文科三班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>768</th>\n",
       "      <td>1500100769</td>\n",
       "      <td>幸映安</td>\n",
       "      <td>24</td>\n",
       "      <td>女</td>\n",
       "      <td>文科六班</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>1500100500</td>\n",
       "      <td>步冰蝶</td>\n",
       "      <td>24</td>\n",
       "      <td>女</td>\n",
       "      <td>理科一班</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             id name  age sex clazz\n",
       "297  1500100298  班冬萱   21   女  文科三班\n",
       "299  1500100300  吴采波   21   女  理科二班\n",
       "298  1500100299  祝涵阳   21   女  文科四班\n",
       "799  1500100800  终飞昂   21   男  文科三班\n",
       "804  1500100805  戚鸿志   21   男  文科六班\n",
       "..          ...  ...  ...  ..   ...\n",
       "759  1500100760  翁绮彤   24   女  文科五班\n",
       "758  1500100759  洪昂然   24   男  理科一班\n",
       "353  1500100354  洪尔容   24   女  文科三班\n",
       "768  1500100769  幸映安   24   女  文科六班\n",
       "499  1500100500  步冰蝶   24   女  理科一班\n",
       "\n",
       "[1000 rows x 5 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df可以指定一个字段进行排序\n",
    "students.sort_values(by=\"age\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "2c4166c4e60205886b39594020a0a92964d0c5302c672db710129f3f2bb29b9f"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
